1 Working example SpillOver

-The code use several datasets EMPRES-I (FAO), ECDC Surveillance Atlas ECDC, Global Health observatory (WHO). Adagio. -Please feel free to analyse the code and run from your terminal (there is a folder with the code and data on: DG1 Disease occurrence > datasources > Spill-over > sources > spillover.Rmd)

2 Libraries

# Spill over analysis
# Update: 17/01/2024
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(stringr)
library(readr)
library(data.table)
## 
## Attaching package: 'data.table'
## 
## The following objects are masked from 'package:lubridate':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year
## 
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## 
## The following object is masked from 'package:purrr':
## 
##     transpose
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(DT)

3 Working directory

setwd("C:/Users/alfredo.acosta/SVA/LiRA_consortium - Documents/WG1 Disease occurrence/datasources/Spill-over/sources")

4 Loading ECDC Surveillance atlas

# West Nile 
soe <- read.csv(file="ECDC_surveillance_data_West_Nile_virus_infection.csv")
# Rabies
soe1 <- read.csv(file="ECDC_surveillance_data_Rabies.csv")
# Crimean congo
soe2 <- read.csv(file="ECDC_surveillance_data_Crimean-Congo_haemorrhagic_fever.csv")
# Rift Valley
soe3 <- read.csv(file="ECDC_surveillance_data_Rift_valley_fever.csv")
# Influenza
# soe4 <- read.csv(file="ECDC_surveillance_data_Influenza.csv")

soe <- rbind(soe, soe1, soe2, soe3)
rm(soe1, soe2, soe3)

soe1 <- soe %>% 
    group_by(HealthTopic, RegionName,Time) %>%
    summarise(cases=sum(NumValue)) %>% 
  filter(cases != 0)
## `summarise()` has grouped output by 'HealthTopic', 'RegionName'. You can
## override using the `.groups` argument.
colnames(soe1) <- c("disease", "country", "time", "cases")

5 Descriptive

# Number of human cases
soe %>% 
  filter(!str_detect(RegionName, "EU")) %>% 
    group_by(HealthTopic) %>%
  summarise(cases=sum(NumValue)) %>% 
  filter(cases != 0)
## # A tibble: 4 × 2
##   HealthTopic                      cases
##   <chr>                            <dbl>
## 1 Crimean-Congo haemorrhagic fever    87
## 2 Rabies                              25
## 3 Rift valley fever                   17
## 4 West Nile virus infection         6866
# Number of countries
soe %>% 
  filter(!str_detect(RegionName, "EU")) %>% 
    group_by(HealthTopic) %>%
  summarise(countries=length(unique(RegionName)))
## # A tibble: 4 × 2
##   HealthTopic                      countries
##   <chr>                                <int>
## 1 Crimean-Congo haemorrhagic fever        29
## 2 Rabies                                  31
## 3 Rift valley fever                       24
## 4 West Nile virus infection               36
table(soe$Time)
## 
## 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 
##   10   10   16   44   48  114  114  115  117  125  127  127  128  128  130  131 
## 2019 2020 2021 2022 
##  131  124  126  128
table(soe$Time, soe$HealthTopic)
##       
##        Crimean-Congo haemorrhagic fever Rabies Rift valley fever
##   2003                                5      0                 5
##   2004                                5      0                 5
##   2005                                5      6                 5
##   2006                                7     31                 6
##   2007                                9     33                 6
##   2008                               29     34                22
##   2009                               30     33                23
##   2010                               30     33                24
##   2011                               30     33                24
##   2012                               31     34                25
##   2013                               31     34                26
##   2014                               31     34                26
##   2015                               32     34                26
##   2016                               32     34                26
##   2017                               32     34                27
##   2018                               32     34                27
##   2019                               32     34                27
##   2020                               31     32                26
##   2021                               31     33                26
##   2022                               32     33                27
##       
##        West Nile virus infection
##   2003                         0
##   2004                         0
##   2005                         0
##   2006                         0
##   2007                         0
##   2008                        29
##   2009                        28
##   2010                        28
##   2011                        30
##   2012                        35
##   2013                        36
##   2014                        36
##   2015                        36
##   2016                        36
##   2017                        37
##   2018                        38
##   2019                        38
##   2020                        35
##   2021                        36
##   2022                        36

6 Loading Adagio database

# Reading ADAGIO database
adg <- read.csv(file= "Outbreaks and cases with labels.csv")
str(adg)
## 'data.frame':    3670 obs. of  9 variables:
##  $ Disease        : chr  "African swine fever virus" "African swine fever virus" "African swine fever virus" "African swine fever virus" ...
##  $ FunctionalGroup: chr  "Wildlife amplification" "Wildlife amplification" "Wildlife amplification" "Wildlife amplification" ...
##  $ Species        : chr  "Wild Boar" "Wild Boar" "Wild Boar" "Wild Boar" ...
##  $ Country        : chr  "Poland" "Poland" "Poland" "Poland" ...
##  $ NUTS2          : chr  "Warmińsko-Mazurskie" "Warmińsko-Mazurskie" "Warmińsko-Mazurskie" "Warmińsko-Mazurskie" ...
##  $ Year           : int  2019 2019 2019 2018 2019 2019 2019 2019 2019 2019 ...
##  $ Month          : chr  "November" "October" "February" "November" ...
##  $ SumCases       : int  168 97 102 93 38 71 105 130 53 58 ...
##  $ SumOutbreaks   : int  122 87 60 50 37 49 62 87 45 42 ...
adg %>% 
  filter(Disease == "West Nile Fever") %>% 
  group_by(Country, FunctionalGroup,Species, Year) %>% 
  summarize(cases=sum(SumCases))
## `summarise()` has grouped output by 'Country', 'FunctionalGroup', 'Species'.
## You can override using the `.groups` argument.
## # A tibble: 130 × 5
## # Groups:   Country, FunctionalGroup, Species [66]
##    Country   FunctionalGroup        Species                 Year cases
##    <chr>     <chr>                  <chr>                  <int> <int>
##  1 Algeria   Domestic spillover     Domesticated Equids     2022    10
##  2 Algeria   Domestic spillover     Domesticated Equids     2023     6
##  3 Argentina Domestic spillover     Domesticated Equids     2006     3
##  4 Austria   Domestic spillover     Domesticated Equids     2019     4
##  5 Austria   Domestic spillover     Domesticated Equids     2020     2
##  6 Austria   Domestic spillover     Domesticated Equids     2022     1
##  7 Austria   Domestic spillover     Domesticated Equids     2023     1
##  8 Austria   Wildlife amplification Carrion Crow            2022     1
##  9 Austria   Wildlife amplification Eastern Imperial Eagle  2022     1
## 10 Austria   Wildlife amplification Eurasian Eagle-Owl      2023     1
## # ℹ 120 more rows
adg %>% 
  filter(Disease == "West Nile Fever") %>% 
  # group_by(Country, FunctionalGroup,Species, Year) %>% 
  filter(Species != "Domesticated Equids") %>%
  summarize(cases=sum(SumCases, na.rm = TRUE))
##   cases
## 1   816
# filtering cases by country without equids
adg %>% 
  filter(Disease == "West Nile Fever") %>% 
  filter(Species != "Domesticated Equids") %>%
  group_by(Country) %>% 
  summarize(cases=sum(SumCases))
## # A tibble: 10 × 2
##    Country                cases
##    <chr>                  <int>
##  1 Austria                    3
##  2 Bosnia and Herzegovina     2
##  3 Bulgaria                   2
##  4 France                    NA
##  5 Germany                  288
##  6 Greece                    50
##  7 Madagascar               418
##  8 North Macedonia           36
##  9 Slovenia                   1
## 10 Spain                      1
# West Nile animals cases to match human cases
adg2  <- adg %>% 
  filter(Species != "Domesticated Equids") %>%
  mutate(Disease = gsub("West Nile Fever", "West Nile virus infection", Disease)) %>% 
  # filter(Disease == "West Nile virus infection") %>% 
  group_by(Country, Disease, Year) %>% 
  summarize(cases=sum(SumCases))
## `summarise()` has grouped output by 'Country', 'Disease'. You can override
## using the `.groups` argument.
adg2
## # A tibble: 241 × 4
## # Groups:   Country, Disease [72]
##    Country    Disease                    Year cases
##    <chr>      <chr>                     <int> <int>
##  1 Armenia    African swine fever virus  2007   773
##  2 Armenia    African swine fever virus  2010    37
##  3 Armenia    African swine fever virus  2011    22
##  4 Austria    West Nile virus infection  2022     2
##  5 Austria    West Nile virus infection  2023     1
##  6 Azerbaijan African swine fever virus  2008    NA
##  7 Belarus    African swine fever virus  2013    27
##  8 Belgium    African swine fever virus  2018   156
##  9 Belgium    African swine fever virus  2019   543
## 10 Belgium    African swine fever virus  2020     3
## # ℹ 231 more rows
sum(adg2$cases, na.rm = TRUE) #1720-915  #total 814 withouth equids
## [1] 2331451

7 Loading Human population

hpop <- read_csv("API_SP.POP.TOTL_DS2_en_csv_v2_6508519.csv", skip = 3)
## New names:
## Rows: 266 Columns: 68
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (4): Country Name, Country Code, Indicator Name, Indicator Code dbl (63): 1960,
## 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, ... lgl (1): ...68
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...68`
hpop$`Country Name` <- gsub("Kosovo*","Kosovo", hpop$`Country Name`)

colnames(hpop)
##  [1] "Country Name"   "Country Code"   "Indicator Name" "Indicator Code"
##  [5] "1960"           "1961"           "1962"           "1963"          
##  [9] "1964"           "1965"           "1966"           "1967"          
## [13] "1968"           "1969"           "1970"           "1971"          
## [17] "1972"           "1973"           "1974"           "1975"          
## [21] "1976"           "1977"           "1978"           "1979"          
## [25] "1980"           "1981"           "1982"           "1983"          
## [29] "1984"           "1985"           "1986"           "1987"          
## [33] "1988"           "1989"           "1990"           "1991"          
## [37] "1992"           "1993"           "1994"           "1995"          
## [41] "1996"           "1997"           "1998"           "1999"          
## [45] "2000"           "2001"           "2002"           "2003"          
## [49] "2004"           "2005"           "2006"           "2007"          
## [53] "2008"           "2009"           "2010"           "2011"          
## [57] "2012"           "2013"           "2014"           "2015"          
## [61] "2016"           "2017"           "2018"           "2019"          
## [65] "2020"           "2021"           "2022"           "...68"
hpop2 <- hpop[,c(1,48:66)]
hpop2 <- melt(hpop2, id = 1)
## Warning in melt(hpop2, id = 1): The melt generic in data.table has been passed
## a tbl_df and will attempt to redirect to the relevant reshape2 method; please
## note that reshape2 is deprecated, and this redirection is now deprecated as
## well. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace like
## reshape2::melt(hpop2). In the next version, this warning will become an error.
colnames(hpop2)
## [1] "Country Name" "variable"     "value"

8 Matrix with human population over the years

# Create matrix
db <-expand.grid(unique(hpop$`Country Name`), c(2003:2023), unique(soe1$disease))

colnames(db) <- c("country", "time", "disease")

# First index to pass the animal cases db & adg
index <- match(paste(db$country, db$time, db$disease), paste(adg2$Country, adg2$Year, adg2$Disease))
db$animal_c <- adg2$cases[index]

# Second index to pass the human cases db & soe1
index <- match(paste(db$country, db$time, db$disease), paste(soe1$country, soe1$time, soe1$disease))
db$human_c <- soe1$cases[index]

unique(db$disease)
## [1] Crimean-Congo haemorrhagic fever Rabies                          
## [3] Rift valley fever                West Nile virus infection       
## 4 Levels: Crimean-Congo haemorrhagic fever Rabies ... West Nile virus infection
unique(soe1$disease)
## [1] "Crimean-Congo haemorrhagic fever" "Rabies"                          
## [3] "Rift valley fever"                "West Nile virus infection"
# third index to pass the human pop
index <- match(paste(db$country, db$time), paste(hpop2$`Country Name`, hpop2$variable))
db$human_pop <- hpop2$value[index]

db %>% 
  group_by(disease) %>%
  summarise(casesAnimal=sum(animal_c, na.rm = TRUE),
            casesHuman=sum(human_c, na.rm = TRUE))
## # A tibble: 4 × 3
##   disease                          casesAnimal casesHuman
##   <fct>                                  <int>      <dbl>
## 1 Crimean-Congo haemorrhagic fever           0         87
## 2 Rabies                                     0         25
## 3 Rift valley fever                          0         17
## 4 West Nile virus infection                814       6832

8.1 Human cases / 100.000 habitants

db$human_cases_hab <- db$human_c/db$human_pop*100000

9 Reading the UN regions

unregions <- read.csv("all.csv")
unregions$name[unregions$name == "United States of America"] <- "United States"
unregions$name[unregions$name == "United Kingdom of Great Britain and Northern Ireland"] <- "United Kingdom"

str(unregions)
## 'data.frame':    249 obs. of  11 variables:
##  $ name                    : chr  "Afghanistan" "Ã…land Islands" "Albania" "Algeria" ...
##  $ alpha.2                 : chr  "AF" "AX" "AL" "DZ" ...
##  $ alpha.3                 : chr  "AFG" "ALA" "ALB" "DZA" ...
##  $ country.code            : int  4 248 8 12 16 20 24 660 10 28 ...
##  $ iso_3166.2              : chr  "ISO 3166-2:AF" "ISO 3166-2:AX" "ISO 3166-2:AL" "ISO 3166-2:DZ" ...
##  $ region                  : chr  "Asia" "Europe" "Europe" "Africa" ...
##  $ sub.region              : chr  "Southern Asia" "Northern Europe" "Southern Europe" "Northern Africa" ...
##  $ intermediate.region     : chr  "" "" "" "" ...
##  $ region.code             : int  142 150 150 2 9 150 2 19 NA 19 ...
##  $ sub.region.code         : int  34 154 39 15 61 39 202 419 NA 419 ...
##  $ intermediate.region.code: int  NA NA NA NA NA NA 17 29 NA 29 ...
str(db)
## 'data.frame':    22344 obs. of  7 variables:
##  $ country        : Factor w/ 266 levels "Aruba","Africa Eastern and Southern",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ time           : int  2003 2003 2003 2003 2003 2003 2003 2003 2003 2003 ...
##  $ disease        : Factor w/ 4 levels "Crimean-Congo haemorrhagic fever",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ animal_c       : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ human_c        : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ human_pop      : num  9.27e+04 4.34e+08 2.26e+07 2.93e+08 1.81e+07 ...
##  $ human_cases_hab: num  NA NA NA NA NA NA NA NA NA NA ...
##  - attr(*, "out.attrs")=List of 2
##   ..$ dim     : int [1:3] 266 21 4
##   ..$ dimnames:List of 3
##   .. ..$ Var1: chr [1:266] "Var1=Aruba" "Var1=Africa Eastern and Southern" "Var1=Afghanistan" "Var1=Africa Western and Central" ...
##   .. ..$ Var2: chr [1:21] "Var2=2003" "Var2=2004" "Var2=2005" "Var2=2006" ...
##   .. ..$ Var3: chr [1:4] "Var3=Crimean-Congo haemorrhagic fever" "Var3=Rabies" "Var3=Rift valley fever" "Var3=West Nile virus infection"
db$country <- as.character(db$country)
db <- data.frame(db)
db$unr <- unregions$sub.region[match(db$country, unregions$name)]

10 Update H1N1 - RVF - JE FAO report

so <- read.csv(file="Latest Reported Events.csv") #spill-over: so
so1 <- read.csv(file="Rift Valley fever (all).csv") #spill-over: so

so$Serotype <- NULL
so1$Location <- NULL

colnames(so)[3] <- "lat"
colnames(so)[4] <- "lon"
colnames(so1)
##  [1] "Event.ID"         "Disease"          "lat"              "lon"             
##  [5] "Locality"         "Country"          "Region"           "observation.date"
##  [9] "report.date"      "Date"             "Animal.Type"      "Animal.Species"  
## [13] "Species"          "Diagnosis.Status" "Humans.Affected"  "Human.Deaths"    
## [17] "Diagnosis.Source"
so1 <- so1[, c(1:9,13,17,15,16,14)]
so <- rbind(so, so1)

so$Country[so$Country == "U.K. of Great Britain and Northern Ireland"] <- "United Kingdom"
so$Country[so$Country == "United States of America"] <- "United States"


so$year <- year(ymd(substr(so$report.date, 1,10)) )

# Region distribution for cases (lira interest)
fao <- so %>% 
  filter(!is.na(Humans.Affected)) %>% 
  filter(Disease == "Influenza - Avian" |
           Disease == "Rabies" | 
           Disease == "Rift Valley fever") %>% 
  group_by(country=Country, time=year, disease=Disease) %>% 
  summarise(human_c=sum(Humans.Affected))
## `summarise()` has grouped output by 'country', 'time'. You can override using
## the `.groups` argument.
fao_animal <- so %>% 
  filter(is.na(Humans.Affected)) %>% 
  filter(Disease == "Influenza - Avian" |
           Disease == "Rabies" | 
           Disease == "Rift Valley fever") %>% 
  group_by(country=Country, time=year, disease=Disease) %>% 
  summarise(animal_c=n())
## `summarise()` has grouped output by 'country', 'time'. You can override using
## the `.groups` argument.
fao$animal_c <- fao_animal$animal_c[match(paste(fao$country, fao$time, fao$disease), paste(fao_animal$country, fao_animal$time, fao_animal$disease))]


fao <- fao[,c(1,2,3,5,4)]


colnames(db)
## [1] "country"         "time"            "disease"         "animal_c"       
## [5] "human_c"         "human_pop"       "human_cases_hab" "unr"
colnames(fao)
## [1] "country"  "time"     "disease"  "animal_c" "human_c"
# Completing with population
fao$human_pop <- hpop2$value[match(paste(fao$country, fao$time-2), paste(hpop2$`Country Name`, hpop2$variable))]
fao$human_cases_hab <- fao$human_c/fao$human_pop*100000

fao$unr <- unregions$sub.region[match(fao$country, unregions$name)]

# Japanese encephalitis
# https://www.who.int/data/gho/data/indicators/indicator-details/GHO/japanese-encephalitis---number-of-reported-cases
so2 <- read.csv(file="FAO_JE.csv") #spill-over: so

str(so2)
## 'data.frame':    1390 obs. of  34 variables:
##  $ IndicatorCode             : chr  "WHS3_42" "WHS3_42" "WHS3_42" "WHS3_42" ...
##  $ Indicator                 : chr  "Japanese encephalitis - number of reported cases" "Japanese encephalitis - number of reported cases" "Japanese encephalitis - number of reported cases" "Japanese encephalitis - number of reported cases" ...
##  $ ValueType                 : chr  "numeric" "numeric" "numeric" "numeric" ...
##  $ ParentLocationCode        : chr  "AFR" "EMR" "SEAR" "AFR" ...
##  $ ParentLocation            : chr  "Africa" "Eastern Mediterranean" "South-East Asia" "Africa" ...
##  $ Location.type             : chr  "Country" "Country" "Country" "Country" ...
##  $ SpatialDimValueCode       : chr  "DZA" "BHR" "BTN" "BWA" ...
##  $ Location                  : chr  "Algeria" "Bahrain" "Bhutan" "Botswana" ...
##  $ Period.type               : chr  "Year" "Year" "Year" "Year" ...
##  $ Period                    : int  2022 2022 2022 2022 2022 2022 2022 2022 2022 2022 ...
##  $ IsLatestYear              : chr  "true" "true" "true" "true" ...
##  $ Dim1.type                 : logi  NA NA NA NA NA NA ...
##  $ Dim1                      : logi  NA NA NA NA NA NA ...
##  $ Dim1ValueCode             : logi  NA NA NA NA NA NA ...
##  $ Dim2.type                 : logi  NA NA NA NA NA NA ...
##  $ Dim2                      : logi  NA NA NA NA NA NA ...
##  $ Dim2ValueCode             : logi  NA NA NA NA NA NA ...
##  $ Dim3.type                 : logi  NA NA NA NA NA NA ...
##  $ Dim3                      : logi  NA NA NA NA NA NA ...
##  $ Dim3ValueCode             : logi  NA NA NA NA NA NA ...
##  $ DataSourceDimValueCode    : logi  NA NA NA NA NA NA ...
##  $ DataSource                : logi  NA NA NA NA NA NA ...
##  $ FactValueNumericPrefix    : logi  NA NA NA NA NA NA ...
##  $ FactValueNumeric          : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ FactValueUoM              : logi  NA NA NA NA NA NA ...
##  $ FactValueNumericLowPrefix : logi  NA NA NA NA NA NA ...
##  $ FactValueNumericLow       : logi  NA NA NA NA NA NA ...
##  $ FactValueNumericHighPrefix: logi  NA NA NA NA NA NA ...
##  $ FactValueNumericHigh      : logi  NA NA NA NA NA NA ...
##  $ Value                     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ FactValueTranslationID    : logi  NA NA NA NA NA NA ...
##  $ FactComments              : logi  NA NA NA NA NA NA ...
##  $ Language                  : chr  "EN" "EN" "EN" "EN" ...
##  $ DateModified              : chr  "2023-07-10T22:00:00.000Z" "2023-07-10T22:00:00.000Z" "2023-07-10T22:00:00.000Z" "2023-07-10T22:00:00.000Z" ...
so3 <- so2 %>% 
  mutate(disease=gsub("Japanese encephalitis - number of reported cases", "Japanese encephalitis", Indicator)) %>%
  mutate(animal_c= "0") %>% 
  filter(Value != 0) %>% 
  group_by(country=Location, time=Period, disease, animal_c) %>% 
  summarise(human_c=sum(Value))
## `summarise()` has grouped output by 'country', 'time', 'disease'. You can
## override using the `.groups` argument.
# Completing with population
so3$human_pop <- hpop2$value[match(paste(so3$country, so3$time), paste(hpop2$`Country Name`, hpop2$variable))]

so3$human_cases_hab <- so3$human_c/so3$human_pop*100000

so3$unr <- unregions$sub.region[match(so3$country, unregions$name)]

colnames(db)
## [1] "country"         "time"            "disease"         "animal_c"       
## [5] "human_c"         "human_pop"       "human_cases_hab" "unr"
colnames(so3)
## [1] "country"         "time"            "disease"         "animal_c"       
## [5] "human_c"         "human_pop"       "human_cases_hab" "unr"
db <- rbind(db, fao, so3)

11 General view on rate x 100.000 hab

db %>% 
  filter(time > 2003) %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(time, human_cases_hab, group=country))+
  geom_point()

db %>% 
  filter(time > 2003) %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(time, human_cases_hab, group=country))+
  geom_point()+
  scale_y_log10()+
  facet_wrap(vars(disease), ncol = 1)

## West Nile Virus

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "West Nile virus infection") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(human_cases_hab, color=country))+
  geom_boxplot()+
  scale_x_log10()+
  facet_wrap(vars(country), ncol = 1)+
  theme_minimal() +
  theme(legend.position="none")+
  xlab("WNV human cases / 100.000 habitants")+
  ylab(NULL) + 
  guides(y="none")

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "West Nile virus infection") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(human_cases_hab, time, group=time))+
  geom_boxplot()+
  scale_x_log10()+
  theme_minimal() +
  theme(legend.position="none")+
  xlab("WNV human cases / 100.000 habitants (boxplot grouped by year)")+
  ylab(NULL)

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "West Nile virus infection") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(time, human_cases_hab, color=country))+
  geom_point()+
  geom_line(size=1)+
  scale_y_log10()+
  facet_wrap(vars(country), ncol = 3)+
  theme_minimal() +
  theme(legend.position="none") +
  ylab(NULL)+
  xlab("WNF human cases / 100.000 habitants (by year-country)")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

11.1 Rabies

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "Rabies") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(human_cases_hab, time, group=time))+
  geom_boxplot()+
  scale_x_log10()+
  theme_minimal() +
  theme(legend.position="none")+
  xlab("Rb human cases / 100.000 habitants (boxplot grouped by year)")+
  ylab(NULL)

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "Rabies") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(human_cases_hab, color=country))+
  geom_boxplot()+
  scale_x_log10()+
  facet_wrap(vars(country), ncol = 1)+
  theme_minimal() +
  theme(legend.position="none")+
  xlab("Rb human cases / 100.000 habitants")+
  ylab(NULL) + 
  guides(y="none")

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "Rabies") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(time, human_cases_hab, color=country))+
  geom_point()+
  geom_line(size=1)+
  scale_y_log10()+
  facet_wrap(vars(country), ncol = 3)+
  theme_minimal() +
  theme(legend.position="none") +
  ylab(NULL)+
  xlab("Rb human cases / 100.000 habitants (by year-country)")
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

11.2 Rift valley fever

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "Rift valley fever") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(human_cases_hab, time, group=time))+
  geom_boxplot()+
  scale_x_log10()+
  theme_minimal() +
  theme(legend.position="none")+
  xlab("RVF human cases / 100.000 habitants (boxplot grouped by year)")+
  ylab(NULL)

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "Rift valley fever") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(human_cases_hab, color=country))+
  geom_boxplot()+
  scale_x_log10()+
  facet_wrap(vars(country), ncol = 1)+
  theme_minimal() +
  theme(legend.position="none")+
  xlab("RVF human cases / 100.000 habitants")+
  ylab(NULL) + 
  guides(y="none")

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "Rift valley fever") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(time, human_cases_hab, color=country))+
  geom_point()+
  geom_line(size=1)+
  scale_y_log10()+
  facet_wrap(vars(country), ncol = 3)+
  theme_minimal() +
  theme(legend.position="none") +
  ylab(NULL)+
  xlab("RVF human cases / 100.000 habitants (by year-country)")

11.3 Crimean-Congo haemorrhagic fever

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "Crimean-Congo haemorrhagic fever") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(human_cases_hab, time, group=time))+
  geom_boxplot()+
  scale_x_log10()+
  theme_minimal() +
  theme(legend.position="none")+
  xlab("CCHF human cases / 100.000 habitants (boxplot grouped by year)")+
  ylab(NULL)

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "Crimean-Congo haemorrhagic fever") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(human_cases_hab, color=country))+
  geom_boxplot()+
  scale_x_log10()+
  facet_wrap(vars(country), ncol = 1)+
  theme_minimal() +
  theme(legend.position="none")+
  xlab("CCHF human cases / 100.000 habitants")+
  ylab(NULL) + 
  guides(y="none")

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "Crimean-Congo haemorrhagic fever") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(time, human_cases_hab, color=country))+
  geom_point()+
  geom_line(size=1)+
  scale_y_log10()+
  facet_wrap(vars(country), ncol = 3)+
  theme_minimal() +
  theme(legend.position="none") +
  ylab(NULL)+
  xlab("CCHF human cases / 100.000 habitants (by year-country)")
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

## Influenza - Avian

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "Influenza - Avian") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(human_cases_hab, time, group=time))+
  geom_boxplot()+
  scale_x_log10()+
  theme_minimal() +
  theme(legend.position="none")+
  xlab("CCHF human cases / 100.000 habitants (boxplot grouped by year)")+
  ylab(NULL)

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "Influenza - Avian") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(human_cases_hab, color=country))+
  geom_boxplot()+
  scale_x_log10()+
  facet_wrap(vars(country), ncol = 1)+
  theme_minimal() +
  theme(legend.position="none")+
  xlab("CCHF human cases / 100.000 habitants")+
  ylab(NULL) + 
  guides(y="none")

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "Influenza - Avian") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(time, human_cases_hab, color=country))+
  geom_point()+
  geom_line(size=1)+
  scale_y_log10()+
  facet_wrap(vars(country), ncol = 3)+
  theme_minimal() +
  theme(legend.position="none") +
  ylab(NULL)+
  xlab("CCHF human cases / 100.000 habitants (by year-country)")
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

11.4 Japanese encephalitis

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "Japanese encephalitis") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(human_cases_hab, time, group=time))+
  geom_boxplot()+
  scale_x_log10()+
  theme_minimal() +
  theme(legend.position="none")+
  xlab("CCHF human cases / 100.000 habitants (boxplot grouped by year)")+
  ylab(NULL)

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "Japanese encephalitis") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(human_cases_hab, color=country))+
  geom_boxplot()+
  scale_x_log10()+
  facet_wrap(vars(country), ncol = 1)+
  theme_minimal() +
  theme(legend.position="none")+
  xlab("CCHF human cases / 100.000 habitants")+
  ylab(NULL) + 
  guides(y="none")

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "Japanese encephalitis") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(time, human_cases_hab, color=country))+
  geom_point()+
  geom_line(size=1)+
  scale_y_log10()+
  facet_wrap(vars(country), ncol = 3)+
  theme_minimal() +
  theme(legend.position="none") +
  ylab(NULL)+
  xlab("CCHF human cases / 100.000 habitants (by year-country)")
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

# Ordering number of cases to check patterns

wnf <- db %>% 
  group_by(country, disease) %>% 
  filter(time > 2003) %>% 
  filter(!is.na(human_cases_hab)) %>% 
  summarise(mean_cases=mean(human_cases_hab)) %>% 
  # mutate(level=cut(mean_cases, breaks = 3, labels = c(1,2,3))) %>% 
  mutate(level=cut(mean_cases, breaks = 3)) %>% 
  arrange(desc(level))
## `summarise()` has grouped output by 'country'. You can override using the
## `.groups` argument.
dbw <- db %>% 
  filter(time > 2003) %>% 
  filter(disease == "West Nile virus infection") %>% 
  filter(!is.na(human_cases_hab))

dbw$order <- wnf$mean_cases[match(dbw$country, wnf$country)]
dbw$country <- reorder(dbw$country, dbw$order)
 
dbw %>% 
  filter(disease == "West Nile virus infection") %>% 
  ggplot(aes(time, human_cases_hab, color=country))+
  geom_point()+
  geom_line(size=0.1)+
  scale_y_log10()+
  facet_wrap(vars(country), nrow = 3)+
  theme_minimal() +
  theme(legend.position="none") +
  ylab(NULL)+
  xlab("WNF human cases / 100.000 habitants (by year-country) ordered")
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

11.5 Rabies ordering

wnf <- db %>% 
  group_by(country) %>% 
  filter(time > 2003) %>% 
  filter(disease == "Rabies") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  summarise(mean_cases=mean(human_cases_hab)) %>% 
  # mutate(level=cut(mean_cases, breaks = 3, labels = c(1,2,3))) %>% 
  mutate(level=cut(mean_cases, breaks = 3)) %>% 
arrange(desc(level))


dbw <- db %>% 
  filter(time > 2003) %>% 
  filter(disease == "Rabies") %>% 
  filter(!is.na(human_cases_hab))

dbw$order <- wnf$mean_cases[match(dbw$country, wnf$country)]

dbw$country <- reorder(dbw$country, dbw$order)
 

  ggplot(dbw,aes(time, human_cases_hab, color=country))+
  geom_point()+
  geom_line(size=0.1)+
  scale_y_log10()+
  facet_wrap(vars(country), nrow = 3)+
  theme_minimal() +
  theme(legend.position="none") +
  ylab(NULL)+
  xlab("RB human cases / 100.000 habitants country ordered by the mean rate")
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

12 General view of diseases spread

db %>% 
  filter(time > 2003) %>% 
  # filter(disease == "Rabies") %>% 
  # filter(unr != "Western Asia") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(human_cases_hab, time, group=time, color=disease))+
  geom_boxplot()+
  scale_x_log10()+
  facet_wrap(vars(disease), ncol = 1)+
  theme_minimal() +
  theme(legend.position="none") +
  ylab(NULL)+
  xlab("human cases/100.000 habitants (log scaled) country rates")

db %>% 
  filter(time > 2003 & !is.na(human_cases_hab & !is.na(disease))) %>% 
  # filter(disease == "Rabies") %>% 
  # filter(!is.na(human_cases_hab)) %>% 
  # filter(unr != "Western Asia") %>%
  ggplot(aes(human_cases_hab, unr, group=unr, color=disease))+
  geom_boxplot()+
  scale_x_log10()+
  facet_wrap(vars(disease), scales="free", ncol = 1)+
  theme_minimal() +
  theme(legend.position="none") +
  ylab(NULL)+
  xlab("human cases/100.000 habitants (log scaled) country rates ")

# Calculating cut points and levels by disease

level <- db %>% 
  filter(time > 2003) %>%
  # filter(!is.na(unr)) %>% 
  filter(!is.na(human_cases_hab)) %>% 
  group_by(disease) %>% 
  mutate(name=cut(human_cases_hab, breaks = 3, labels = c("low","medium","high"))) %>% 
  mutate(ranges=cut(human_cases_hab, breaks = 3)) %>% 
  select(-unr, -animal_c)

level2 <- level %>%  
  group_by(disease,name,ranges) %>% 
  dplyr::summarize(mean_point=median(human_cases_hab),
                   human_cases_hab_unr=(sum(human_c)/sum(human_pop*100000)),
                   mean2=mean(human_cases_hab_unr))
## `summarise()` has grouped output by 'disease', 'name'. You can override using
## the `.groups` argument.
# other option please consider and improve not yet usable because the breaks is static and we need to be disease specific
to_break <- db %>% 
  filter(time > 2003) %>%
  filter(!is.na(unr)) %>% 
  filter(!is.na(human_cases_hab))
breaks <- quantile(to_break$human_cases_hab, c(0,0.25,0.75,1))
breaks
##           0%          25%          75%         100% 
## 0.0003016486 0.0098634451 0.1882437148 5.9433414812
level <- to_break %>% 
  group_by(disease,) %>% 
  mutate(name=cut(human_cases_hab, breaks = breaks, labels = c("low","medium","high"))) %>% 
  mutate(ranges=cut(human_cases_hab, breaks = breaks, right = FALSE)) %>% 
  mutate(name2=cut(human_cases_hab, breaks = 3, labels = c("low","medium","high"))) %>% 
  mutate(ranges2=cut(human_cases_hab, breaks = 3, right = FALSE)) %>% 
  select(-unr, -animal_c)

13 Using UN regions

13.1 West Nile Virus

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "West Nile virus infection") %>% 
  filter(!is.na(human_cases_hab)) %>% 
   filter(!is.na(unr)) %>% 
  ggplot(aes(human_cases_hab, color=unr))+
  geom_boxplot()+
  scale_x_log10()+
  facet_wrap(vars(unr), ncol = 1)+
  theme_minimal() +
  theme(legend.position="none")+
  xlab("WNV human cases / 100.000 habitants")+
  ylab(NULL) + 
  guides(y="none")

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "West Nile virus infection") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  filter(!is.na(unr)) %>% 
  ggplot(aes(human_cases_hab, time, group=time))+
  geom_boxplot()+
  scale_x_log10()+
  theme_minimal() +
  theme(legend.position="none")+
  xlab("WNV human cases / 100.000 habitants (boxplot grouped by year)")+
  ylab(NULL)

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "West Nile virus infection") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  filter(!is.na(unr)) %>% 
  ggplot(aes(time, human_cases_hab, color=unr))+
  geom_point()+
  geom_line(size=1)+
  scale_y_log10()+
  facet_wrap(vars(unr), ncol = 3)+
  theme_minimal() +
  theme(legend.position="none") +
  ylab(NULL)+
  xlab("WNF human cases / 100.000 habitants (by year-country)")

13.2 Rabies

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "Rabies") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(human_cases_hab, time, group=time))+
  geom_boxplot()+
  scale_x_log10()+
  theme_minimal() +
  theme(legend.position="none")+
  xlab("Rb human cases / 100.000 habitants (boxplot grouped by year)")+
  ylab(NULL)

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "Rabies") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(human_cases_hab, color=unr))+
  geom_boxplot()+
  scale_x_log10()+
  facet_wrap(vars(unr), ncol = 1)+
  theme_minimal() +
  theme(legend.position="none")+
  xlab("Rb human cases / 100.000 habitants")+
  ylab(NULL) + 
  guides(y="none")

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "Rabies") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(time, human_cases_hab, color=unr))+
  geom_point()+
  geom_line(size=1)+
  scale_y_log10()+
  facet_wrap(vars(unr), ncol = 3)+
  theme_minimal() +
  theme(legend.position="none") +
  ylab(NULL)+
  xlab("Rb human cases / 100.000 habitants (by year-country)")
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

13.3 Rift valley fever

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "Rift valley fever") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(human_cases_hab, time, group=time))+
  geom_boxplot()+
  scale_x_log10()+
  theme_minimal() +
  theme(legend.position="none")+
  xlab("RVF human cases / 100.000 habitants (boxplot grouped by year)")+
  ylab(NULL)

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "Rift valley fever") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(human_cases_hab, color=unr))+
  geom_boxplot()+
  scale_x_log10()+
  facet_wrap(vars(unr), ncol = 1)+
  theme_minimal() +
  theme(legend.position="none")+
  xlab("RVF human cases / 100.000 habitants")+
  ylab(NULL) + 
  guides(y="none")

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "Rift valley fever") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(time, human_cases_hab, color=unr))+
  geom_point()+
  geom_line(size=1)+
  scale_y_log10()+
  facet_wrap(vars(unr), ncol = 3)+
  theme_minimal() +
  theme(legend.position="none") +
  ylab(NULL)+
  xlab("RVF human cases / 100.000 habitants (by year-country)")

13.4 Crimean-Congo haemorrhagic fever

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "Crimean-Congo haemorrhagic fever") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(human_cases_hab, time, group=time))+
  geom_boxplot()+
  scale_x_log10()+
  theme_minimal() +
  theme(legend.position="none")+
  xlab("CCHF human cases / 100.000 habitants (boxplot grouped by year)")+
  ylab(NULL)

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "Crimean-Congo haemorrhagic fever") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(human_cases_hab, color=unr))+
  geom_boxplot()+
  scale_x_log10()+
  facet_wrap(vars(unr), ncol = 1)+
  theme_minimal() +
  theme(legend.position="none")+
  xlab("CCHF human cases / 100.000 habitants")+
  ylab(NULL) + 
  guides(y="none")

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "Crimean-Congo haemorrhagic fever") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(time, human_cases_hab, color=unr))+
  geom_point()+
  geom_line(size=1)+
  scale_y_log10()+
  facet_wrap(vars(unr), ncol = 3)+
  theme_minimal() +
  theme(legend.position="none") +
  ylab(NULL)+
  xlab("CCHF human cases / 100.000 habitants (by year-unr)")
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

13.5 Influenza - Avian

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "Influenza - Avian") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(human_cases_hab, time, group=time))+
  geom_boxplot()+
  scale_x_log10()+
  theme_minimal() +
  theme(legend.position="none")+
  xlab("CCHF human cases / 100.000 habitants (boxplot grouped by year)")+
  ylab(NULL)

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "Influenza - Avian") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(human_cases_hab, color=unr))+
  geom_boxplot()+
  scale_x_log10()+
  facet_wrap(vars(unr), ncol = 1)+
  theme_minimal() +
  theme(legend.position="none")+
  xlab("CCHF human cases / 100.000 habitants")+
  ylab(NULL) + 
  guides(y="none")

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "Influenza - Avian") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(time, human_cases_hab, color=unr))+
  geom_point()+
  geom_line(size=1)+
  scale_y_log10()+
  facet_wrap(vars(unr), ncol = 3)+
  theme_minimal() +
  theme(legend.position="none") +
  ylab(NULL)+
  xlab("CCHF human cases / 100.000 habitants (by year-unr)")
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

## Japanese encephalitis

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "Japanese encephalitis") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(human_cases_hab, time, group=time))+
  geom_boxplot()+
  scale_x_log10()+
  theme_minimal() +
  theme(legend.position="none")+
  xlab("CCHF human cases / 100.000 habitants (boxplot grouped by year)")+
  ylab(NULL)

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "Japanese encephalitis") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(human_cases_hab, color=unr))+
  geom_boxplot()+
  scale_x_log10()+
  facet_wrap(vars(unr), ncol = 1)+
  theme_minimal() +
  theme(legend.position="none")+
  xlab("CCHF human cases / 100.000 habitants")+
  ylab(NULL) + 
  guides(y="none")

db %>% 
  filter(time > 2003) %>% 
  filter(disease == "Japanese encephalitis") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(time, human_cases_hab, color=unr))+
  geom_point()+
  geom_line(size=1)+
  scale_y_log10()+
  facet_wrap(vars(unr), ncol = 3)+
  theme_minimal() +
  theme(legend.position="none") +
  ylab(NULL)+
  xlab("CCHF human cases / 100.000 habitants (by year-unr)")
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

# Ordering to see patterns ## WNV

wnf <- db %>% 
  group_by(unr, disease) %>% 
  filter(time > 2003) %>% 
  filter(!is.na(human_cases_hab)) %>% 
  summarise(mean_cases=mean(human_cases_hab)) %>% 
  # mutate(level=cut(mean_cases, breaks = 3, labels = c(1,2,3))) %>% 
  mutate(level=cut(mean_cases, breaks = 3)) %>% 
  arrange(desc(level))
## `summarise()` has grouped output by 'unr'. You can override using the `.groups`
## argument.
dbw <- db %>% 
  filter(time > 2003) %>% 
  filter(disease == "West Nile virus infection") %>% 
  filter(!is.na(human_cases_hab))

dbw$order <- wnf$mean_cases[match(dbw$unr, wnf$unr)]
dbw$unr <- reorder(dbw$unr, dbw$order)
 
dbw %>% 
  filter(disease == "West Nile virus infection") %>% 
  ggplot(aes(time, human_cases_hab, color=unr))+
  geom_point()+
  geom_line(size=0.1)+
  scale_y_log10()+
  facet_wrap(vars(unr), nrow = 3)+
  theme_minimal() +
  theme(legend.position="none") +
  ylab(NULL)+
  xlab("WNF human cases / 100.000 habitants (by year-UNregion) ordered")

13.6 Rabies

wnf <- db %>% 
  group_by(unr) %>% 
  filter(time > 2003) %>% 
  filter(disease == "Rabies") %>% 
  filter(!is.na(human_cases_hab)) %>% 
  summarise(mean_cases=mean(human_cases_hab)) %>% 
  # mutate(level=cut(mean_cases, breaks = 3, labels = c(1,2,3))) %>% 
  mutate(level=cut(mean_cases, breaks = 3)) %>% 
arrange(desc(level))


dbw <- db %>% 
  filter(time > 2003) %>% 
  filter(disease == "Rabies") %>% 
  filter(!is.na(human_cases_hab))

dbw$order <- wnf$mean_cases[match(dbw$unr, wnf$unr)]

dbw$unr <- reorder(dbw$unr, dbw$order)
 

  ggplot(dbw,aes(time, human_cases_hab, color=unr))+
  geom_point()+
  geom_line(size=0.1)+
  scale_y_log10()+
  facet_wrap(vars(unr), nrow = 3)+
  theme_minimal() +
  theme(legend.position="none") +
  ylab(NULL)+
  xlab("RB human cases / 100.000 habitants UNregion ordered by the mean rate")
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

14 General view of the rate by UNRegions

UN_db <- db %>% 
  group_by(disease, unr, time) %>% 
  filter(time > 2003) %>% 
  filter(unr != "Western Asia") %>% 
  filter(!is.na(human_cases_hab)) %>%
  summarise(human_cases_hab_unr=sum(human_c)/sum(human_pop)*100000)
## `summarise()` has grouped output by 'disease', 'unr'. You can override using
## the `.groups` argument.
datatable(UN_db)
ggplotly(UN_db %>% 
  filter(time > 2003) %>% 
  # filter(disease == "Rabies") %>% 
  filter(!is.na(human_cases_hab_unr)) %>% 
  ggplot(aes(human_cases_hab_unr, unr, group=unr, color=disease))+
  geom_boxplot()+
  scale_x_log10()+
  facet_wrap(vars(disease), scales = "free", ncol=1) +
  theme_minimal() +
  theme(legend.position="none") +
  ylab(NULL)+
  xlab("human cases / 100.000 habitants (boxplots of UNRegion rates) log scaled "))

15 Calculating cut points and levels by disease and UNregions

UNRegion_db <- db %>% 
  group_by(disease, unr) %>% 
  filter(time > 2003) %>% 
  filter(unr != "Western Asia") %>% 
  filter(!is.na(human_cases_hab)) %>%
  summarise(human_cases_hab_unr=(sum(human_c)/sum(human_pop)*100000)) %>% 
  mutate(name=cut(human_cases_hab_unr, breaks = 3, labels = c("low","medium","high"))) %>% 
  mutate(range=cut(human_cases_hab_unr, breaks = 3))
## `summarise()` has grouped output by 'disease'. You can override using the
## `.groups` argument.
UNRegion_db
## # A tibble: 32 × 5
## # Groups:   disease [7]
##    disease                          unr          human_cases_hab_unr name  range
##    <fct>                            <chr>                      <dbl> <fct> <fct>
##  1 Crimean-Congo haemorrhagic fever Eastern Eur…             0.0725  high  (0.0…
##  2 Crimean-Congo haemorrhagic fever Northern Eu…             0.00156 low   (0.0…
##  3 Crimean-Congo haemorrhagic fever Southern Eu…             0.00524 low   (0.0…
##  4 Crimean-Congo haemorrhagic fever Western Eur…             0.00244 low   (0.0…
##  5 Rabies                           Eastern Eur…             0.00616 high  (0.0…
##  6 Rabies                           Northern Eu…             0.00337 medi… (0.0…
##  7 Rabies                           South-easte…             0.00517 high  (0.0…
##  8 Rabies                           Southern Eu…             0.00244 low   (0.0…
##  9 Rabies                           Western Eur…             0.00194 low   (0.0…
## 10 Rift valley fever                Northern Eu…             0.00156 low   (0.0…
## # ℹ 22 more rows
UNRegion_db <- db %>% 
  group_by(disease) %>% 
  filter(time > 2003) %>% 
  filter(unr != "Western Asia") %>% 
  filter(!is.na(human_cases_hab)) %>%
  summarise(human_cases_hab_unr=(sum(human_c)/sum(human_pop)*100000)) %>% 
  mutate(name=cut(human_cases_hab_unr, breaks = 3, labels = c("low","medium","high"))) %>% 
  mutate(range=cut(human_cases_hab_unr, breaks = 3))
UNRegion_db
## # A tibble: 7 × 4
##   disease                          human_cases_hab_unr name   range           
##   <fct>                                          <dbl> <fct>  <fct>           
## 1 Crimean-Congo haemorrhagic fever             0.0162  low    (0.00152,0.0546]
## 2 Rabies                                       0.00296 low    (0.00152,0.0546]
## 3 Rift valley fever                            0.00370 low    (0.00152,0.0546]
## 4 West Nile virus infection                    0.160   high   (0.107,0.161]   
## 5 Influenza - Avian                            0.00168 low    (0.00152,0.0546]
## 6 Rift Valley fever                            0.104   medium (0.0546,0.107]  
## 7 Japanese encephalitis                        0.135   high   (0.107,0.161]

16 General cut points by disease

db %>% 
  filter(time > 2003) %>% 
  filter(!is.na(human_cases_hab)) %>% 
  ggplot(aes(human_cases_hab, color=disease, group=unr))+
  geom_boxplot()+
  scale_x_log10()+
  facet_wrap(vars(disease, unr))+
  theme_minimal() +
  theme(legend.position="none") +
  ylab(NULL)+
  xlab("human cases / 100.000 habitants (agregated by UNRegions and years)")

17 Lira table - General cut points by disease values

# lira_table_sp <- db %>%
#   filter(time > 2003) %>%
#   # filter(unr != "Western Asia") %>% 
#   filter(!is.na(human_cases_hab)) %>%
#   group_by(disease, UNregions=unr) %>%
#   summarise(
#     human_cases=sum(human_c),
#     human_pop=sum(human_pop),
#     # animal_cases=sum(animal_c),
#     human_cases_hab_UNR=(sum(human_c)/sum(human_pop)*100000),
#     CI05 = quantile(human_cases_hab, probs = 0.05, na.rm = TRUE),
#     CI95 = quantile(human_cases_hab, probs = 0.95, na.rm = TRUE)) %>%
#   mutate(level=cut(human_cases_hab_UNR, breaks = 3, labels = c("low","medium","high"))) %>% 
#   arrange(disease, desc(human_cases_hab_UNR), UNregions)
# 
# lira_table_sp
# write.csv(lira_table_sp, file="lira_table_sp.csv")
lira_table_sp <- db %>%
  filter(time > 2003) %>%
  # filter(unr != "Western Asia") %>% 
  filter(!is.na(human_cases_hab)) %>%
  group_by(disease, UNregions=unr) %>%
  summarise(
    human_cases=sum(human_c),
    human_pop=sum(human_pop),
    human_rate_mean=round(mean(human_cases_hab), 4),
    CI05 = round(quantile(human_cases_hab, probs = 0.05, na.rm = TRUE), 4),
    CI95 = round(quantile(human_cases_hab, probs = 0.95, na.rm = TRUE) , 4)) %>%
  mutate(level=cut(human_rate_mean, breaks = 3, labels = c("low","medium","high"))) %>% 
  arrange(disease, desc(human_rate_mean), UNregions)
## `summarise()` has grouped output by 'disease'. You can override using the
## `.groups` argument.
datatable(lira_table_sp)
lira_table_sp2 <- db %>%
  filter(time > 2003) %>%
  # filter(unr != "Western Asia") %>% 
  filter(!is.na(human_cases_hab)) %>%
  group_by(disease, UNregions=unr) %>%
  summarise(
    human_cases=sum(human_c),
    human_pop=sum(human_pop),
    human_rate_median=round(median(human_cases_hab), 4),
    CI05 = round(quantile(human_cases_hab, probs = 0.05, na.rm = TRUE), 4),
    CI95 = round(quantile(human_cases_hab, probs = 0.95, na.rm = TRUE) ,4)) %>%
  mutate(level=cut(human_rate_median, breaks = 3, labels = c("low","medium","high"))) %>% 
  arrange(disease, desc(human_rate_median), UNregions)
## `summarise()` has grouped output by 'disease'. You can override using the
## `.groups` argument.
datatable(lira_table_sp2)

18 Credits Acosta, Alfredo PhD1.

SVA1: SVA http://www.sva.se/.